######################################################
#### Replication Material for II Joo & Mukherjee #####
###     Manuscript ID: GINI-2019-1752.R2 (new)   #####
####                   GINI-2019-1752.R1 (old)   ##### 
####          Online Appendix Analyses           #####
####       Software Used: R Version 3.6.0        #####
######################################################


rm(list=ls())


# use for creating a log file 
# wd <- setwd("Your Directory/Joo_Mukherjee_Replication")
# (wd <- getwd())

# sink("joo_mukherjee_onlineappendix_R_output.log")


### set directory ###
wd <- setwd("Your Directory/Joo_Mukherjee_Replication")
(wd <- getwd())


### load libraries ###

library(foreign)
library(ggplot2)
library(mvtnorm)
library(ggthemes)
library(reshape2)
library(margins)



jm <- read.dta("II_Joo_Mukherjee_12.dta")

#####################
####  Figure A1  ####
#####################

par(mar=c(5,5,1,1))

coef <- c(0.1296576,
          -1.418208,
          -0.1377156,
          -1.42445)

vcov <- as.matrix(read.csv("vcov_aut.csv"))

lag <- 0
rebst <- 1
nego <- 0
cease <- 0
mob <- 1
lbd <- median(jm$logbd, na.rm=T)
lbd2 <- median(jm$logbd2, na.rm=T)
ind <- 0
terri <- 0
t <- median(jm$t1, na.rm=T)
t2 <- median(jm$t2, na.rm=T)
t3 <- median(jm$t3, na.rm=T)
censt.sim <- c(1,2,3)
mtime <- median(jm$dur_exist, na.rm=T)
sdtime <- sd(jm$dur_exist, na.rm=T)

set.seed(88110905)
B <- 5000
out <- matrix(NA, ncol=3, nrow=B)
ext <- matrix(NA, ncol=6, nrow=B)

for(i in 1:length(censt.sim)){ 
  censt <- censt.sim[i] 	
  pr.bs <- matrix(NA,B,1)
  for(b in 1:B){ 
    newcoef <-  rmvnorm(1, mean=coef, sigma=vcov)
    bs.sum <-  newcoef[1]*censt.sim[i]*(mtime) + newcoef[2]*censt.sim[i] + newcoef[3]*(mtime) + newcoef[4]
    
    bs.sum2 <-  newcoef[1]*censt.sim[i]*(mtime+sdtime) + newcoef[2]*censt.sim[i] + newcoef[3]*(mtime+sdtime) + newcoef[4]
    
    
    out[b,i] <- pnorm(bs.sum2) - pnorm(bs.sum)
    
    if(i==1){
      ext[b,1] <-pnorm(bs.sum2)
      ext[b,2] <- pnorm(bs.sum)
    } else{
      if(i==2){
        ext[b,3] <-pnorm(bs.sum2)
        ext[b,4] <- pnorm(bs.sum)
      } else{
        if(i==3){
          ext[b,5] <-pnorm(bs.sum2)
          ext[b,6] <- pnorm(bs.sum)
        }
      }
    }
    
  }
  rm(bs.sum)
  rm(bs.sum2)
}


low <- as.matrix(sort(out[,1]))
moder <- as.matrix(sort(out[,2]))
high <- as.matrix(sort(out[,3]))

sort.out <- cbind(low, moder, high)
conf95 <- sort.out[(B*0.05+1):(B*0.95),]

pdf(paste(wd, "/Figure_files/Figure_A1.pdf", sep=""),width=8,height=8,paper='special') 

boxplot(conf95, boxwex=0.5, names=c("Low", "Moderate", "High"), xlab="Central Command", 
        ylab="Change in Pr(Autocracy)", cex.lab=1.5, cex.axis=1.5, cex=1.5)
abline(h=0, lty=2, col='red')

dev.off()



#####################
####  Figure A2  ####
#####################


jm2 <- subset(jm, conflict==1)


coef <- c(0.785144,
          0.0215995,
          -0.4511128,
          -0.0391648,
          0.1130028,
          0.24926,
          0.4297826,
          -0.1161887,
          -0.1442897,
          0.0037156,
          0.4386481,
          -0.0449558,
          38.92733,
          -3.246729,
          0.0403836,
          -2.193776)
vcov <- as.matrix(read.csv("vcov_conflict.csv"))


lag <- 0
rebst <- 2
nego <- 0
cease <- 0
mob <- 1
lbd <- median(jm2$logbd, na.rm=T)
lbd2 <- median(jm2$logbd2, na.rm=T)
ind <- 0
terri <- 0
t <- median(jm2$t1, na.rm=T)
t2 <- median(jm2$t2, na.rm=T)
t3 <- median(jm2$t3, na.rm=T)
censt.sim <- c(1,2,3)
mtime <- median(jm2$dur_exist, na.rm=T)
sdtime <- sd(jm2$dur_exist, na.rm=T)

# xtprobit split split_lag centtime cent dur_exist negotiation ceasefire rstrength terr mobilize independence logbd logbd2 t1 t2 t3, re vce(cluster rebid) 
set.seed(88110905)
B <- 3000
out <- matrix(NA, ncol=3, nrow=B)
ext <- matrix(NA, ncol=6, nrow=B)

for(i in 1:length(censt.sim)){ 
  censt <- censt.sim[i] 	
  pr.bs <- matrix(NA,B,1)
  for(b in 1:B){ 
    newcoef <-  rmvnorm(1, mean=coef, sigma=vcov)
    bs.sum <- newcoef[1]*lag + newcoef[2]*censt.sim[i]*(mtime) + newcoef[3]*censt.sim[i] + newcoef[4]*(mtime) + newcoef[5]*nego + 
      newcoef[6]*cease + newcoef[7]*rebst + newcoef[8]*terri + newcoef[9]*mob + newcoef[10]*ind + newcoef[11]*lbd + newcoef[12]*lbd2 +
      newcoef[13]*t + newcoef[14]*t2 + newcoef[15]*t3 + newcoef[16]
    
    
    bs.sum2 <- newcoef[1]*lag + newcoef[2]*censt.sim[i]*(mtime+sdtime) + newcoef[3]*censt.sim[i] + newcoef[4]*(mtime+sdtime) + newcoef[5]*nego + 
      newcoef[6]*cease + newcoef[7]*rebst + newcoef[8]*terri + newcoef[9]*mob + newcoef[10]*ind +  newcoef[11]*lbd + newcoef[12]*lbd2 + 
      newcoef[13]*t + newcoef[14]*t2 + newcoef[15]*t3 + newcoef[16]
    
    out[b,i] <- pnorm(bs.sum2) - pnorm(bs.sum)
    
    if(i==1){
      ext[b,1] <-pnorm(bs.sum2)
      ext[b,2] <- pnorm(bs.sum)
    } else{
      if(i==2){
        ext[b,3] <-pnorm(bs.sum2)
        ext[b,4] <- pnorm(bs.sum)
      } else{
        if(i==3){
          ext[b,5] <-pnorm(bs.sum2)
          ext[b,6] <- pnorm(bs.sum)
        }
      }
    }
    
  }
  rm(bs.sum)
  rm(bs.sum2)
}


low <- as.matrix(sort(out[,1]))
moder <- as.matrix(sort(out[,2]))
high <- as.matrix(sort(out[,3]))

sort.out <- cbind(low, moder, high)
conf95 <- sort.out[(B*0.05+1):(B*0.95),]


pdf(paste(wd, "/Figure_files/Figure_A2.pdf", sep=""),width=8,height=8,paper='special') 

par(mar=c(5,5,2,2))

boxplot(conf95, boxwex=0.5, names=c("Low", "Moderate", "High"), xlab="Central Command", 
        ylab="Change in Pr(Split)", cex.lab=1.5, cex.axis=1.5, cex=1.5)
abline(h=0, lty=2, col='red')

dev.off()



#####################
####  Figure A3  ####
#####################

jm <- read.dta("II_Joo_Mukherjee_12.dta")


coef <- c(0.4455619,
          0.0157448,
          -0.3074662,
          -0.0343053,
          0.264737,
          0.2530055,
          0.2918376,
          -0.1933579,
          0.0040991,
          -0.2345839,
          -0.0237582,
          -0.0040464,
          -27.90933,
          2.099829,
          -0.0496096,
          -0.9857114)
vcov <- as.matrix(read.csv("vcov.csv"))


lag <- 0
rebst <- 1
nego <- 0
cease <- 0
mob <- 1
lbd <- median(jm$logbd, na.rm=T)
lbd2 <- median(jm$logbd2, na.rm=T)
ind <- 0
terri <- 0
t <- median(jm$t1, na.rm=T)
t2 <- median(jm$t2, na.rm=T)
t3 <- median(jm$t3, na.rm=T)
censt.sim <- c(1,2,3)
mtime <- median(jm$dur_exist, na.rm=T)
sdtime <- sd(jm$dur_exist, na.rm=T)

time.sim <- seq(min(jm$dur_exist), max(jm$dur_exist), by=0.5)

##### number of bootstraps ##### 
B <- 1000

set.seed(905)
out2 <- matrix(NA, ncol=3, nrow=length(time.sim))
for(i in 1:length(time.sim)){ 
  time <- time.sim[i] 	
  pr.bs <- matrix(NA,B,1)
  for(b in 1:B){ 
    newcoef <-  rmvnorm(1, mean=coef, sigma=vcov)
    bs.sum <- newcoef[1]*lag + newcoef[2]*3*time + newcoef[3]*3 + newcoef[4]*time + newcoef[5]*nego + 
      newcoef[6]*cease + newcoef[7]*rebst + newcoef[8]*terri + newcoef[9]*mob + newcoef[10]*ind + newcoef[11]*lbd + newcoef[12]*lbd2 +
      newcoef[13]*t + newcoef[14]*t2 + newcoef[15]*t3 + newcoef[16]
    bs.sum2 <- newcoef[1]*lag + newcoef[2]*1*time + newcoef[3]*1 + newcoef[4]*time + newcoef[5]*nego + 
      newcoef[6]*cease + newcoef[7]*rebst + newcoef[8]*terri + newcoef[9]*mob + newcoef[10]*ind + newcoef[11]*lbd + newcoef[12]*lbd2 +
      newcoef[13]*t + newcoef[14]*t2 + newcoef[15]*t3 + newcoef[16]
    pr.bs[b,] <- pnorm(bs.sum) - pnorm(bs.sum2)
  }
  out2[i,] <- c(mean(pr.bs, na.rm=TRUE), quantile(pr.bs, c(0.05, 0.95), na.rm=TRUE)) 
  rm(bs.sum)
}



pdf(paste(wd, "/Figure_files/Figure_A3.pdf", sep=""),width=12,height=8,paper='special') 

par(mar=c(5,5,1,1))
plot(time.sim, out2[,1], type="n", xlab="Rebel Age (years)", ylab="Change in Pr(Splits)", 
     ylim=c(-0.05,0.4),
     cex.lab=1.8, cex.axis=1.5)
lines(lowess(time.sim, out2[,1]), lty=1, lwd=2)
lines(lowess(time.sim, out2[,2]), lty=2, lwd=2)
lines(lowess(time.sim, out2[,3]), lty=2, lwd=2)
abline(h=0, lty=2, col='red', lwd=1)

dev.off()


#####################
####  Figure A4  ####
#####################

max  <- read.dta("split_maxyear.dta")

pdf(paste(wd, "/Figure_files/Figure_A4.pdf", sep=""),width=12,height=8,paper='special') 

ggplot(max) + 
  geom_histogram(aes(x = max_year, y = (..count..)/sum(..count..)), 
                 bins = 80, fill = "gray", colour = "black") +
  theme(axis.line = element_line(size = 1),
        axis.text = element_text(size = 18),
        axis.title = element_text(size = 20),
        panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
        panel.background = element_blank()) +
  xlab("Rebel Age") +
  ylab("Relative Frequency") 
dev.off()



########################
####  Figure A5a-c  ####
########################

low <- subset(max, cent==1)
mod <- subset(max, cent==2)
hi <- subset(max, cent==3)

pdf(paste(wd, "/Figure_files/Figure_A5a.pdf", sep=""),width=10,height=8,paper='special') 
plot(density(low$max_year), xlim=c(0,80), xlab="Rebel Age", main="", cex.lab=1.5)
dev.off()

pdf(paste(wd, "/Figure_files/Figure_A5b.pdf", sep=""),width=10,height=8,paper='special') 
plot(density(mod$max_year), xlim=c(0,80), xlab="Rebel Age", main="",cex.lab=1.5)
dev.off()

pdf(paste(wd, "/Figure_files/Figure_A5c.pdf", sep=""),width=10,height=8,paper='special') 
plot(density(hi$max_year), xlim=c(0,80), xlab="Rebel Age", main="",cex.lab=1.5)
dev.off()



#########################
### Table A9: T-test ####
#########################

cent <- read.dta("split_cent.dta")

low <- subset(cent, cent==1)
mod <- subset(cent, cent==2)
hi <- subset(cent, cent==3)

dat <- rbind(low, hi)
t.test(dat$max_year ~ dat$cent, alternative="two.sided")

dat <- rbind(low, mod)
t.test(dat$max_year ~ dat$cent, alternative="two.sided")

dat <- rbind(mod, hi)
t.test(dat$max_year ~ dat$cent, alternative="two.sided")


###############################
#### Table A10: Chi-2 Test ####
###############################

young <- subset(cent, max_year<(mean(cent$max_year) - sd(cent$max_year)))
old <- subset(cent, max_year> (mean(cent$max_year) + sd(cent$max_year)))
mid <- subset(cent, max_year >=(mean(cent$max_year) - sd(cent$max_year)) & max_year <= (mean(cent$max_year) + sd(cent$max_year)))

table(young$cent)
table(mid$cent)
table(old$cent)

prop.hi <- c(4,  40, 8)
prop.low <- c(11,53,11)
prop.mid <- c(11,89,24)
trial <- c((11+11+4), (53+89+40),(11+24+8))

prop.test(prop.hi, trial, p = NULL, alternative = "two.sided", correct = TRUE)
prop.test(prop.low, trial, p = NULL, alternative = "two.sided", correct = TRUE)
prop.test(prop.mid, trial, p = NULL, alternative = "two.sided", correct = TRUE)


#########################################
########## Survival Analysis ############
#########################################

jm <- read.dta("II_Joo_Mukherjee_12.dta")

##############################
#### Table A12: Cox Model ####
##############################


c1 <- coxph(Surv(dur_exist, split) ~ cent_bi, data=jm)
summary(c1)

c2 <- coxph(Surv(dur_exist, split) ~ cent_bi + mobilize + terr, data=jm)
summary(c2)

c3 <- coxph(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire, data=jm)
summary(c3)

c4 <- coxph(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire + independence + logbd + logbd2, data=jm)
summary(c4)

c5 <- coxph(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire + independence + logbd + 
              logbd2 + state_svac_reb_lag + log_os_reb + log_os_gov, data=jm)
summary(c5)

c6 <- coxph(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire + independence + logbd + 
              logbd2 + state_svac_reb_lag + log_os_reb + log_os_gov + ngroup_country +polwing, data=jm)
summary(c6)


##################################
#### Table A13: Weibull Model ####
##################################


w1 <- survreg(Surv(dur_exist, split) ~ cent_bi, data=jm, dist="weibull")
summary(w1)

w2 <- survreg(Surv(dur_exist, split) ~ cent_bi + mobilize + terr, data=jm, dist="weibull")
summary(w2)

w3 <- survreg(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire, data=jm, dist="weibull")
summary(w3)

w4 <- survreg(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire + independence + logbd + logbd2, data=jm, dist="weibull")
summary(w4)

w5 <- survreg(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire + independence + logbd + 
                logbd2 + state_svac_reb_lag + log_os_reb + log_os_gov , data=jm, dist="weibull")
summary(w5)

w6 <- survreg(Surv(dur_exist, split) ~ cent_bi + mobilize + terr + negotiation + ceasefire + independence + logbd + 
                logbd2 + state_svac_reb_lag + log_os_reb + log_os_gov + ngroup_country +polwing, data=jm, dist="weibull")
summary(w6)


###################
#### Figure A6 ####
###################

new <- with(jm,
            data.frame(cent_bi = c(0, 1), 
                       mobilize = c(1,1),
                       terr = c(0, 0),
                       negotiation = c(0,0),
                       ceasefire = c(0,0),
                       independence =c(0,0),
                       logbd = rep(median(logbd, na.rm = T),2),
                       logbd2 = rep(median(logbd2, na.rm = T),2)
            )
)


fit <- survfit(c4, newdata=new)

pdf(paste(wd, "/Figure_files/Figure_A6.pdf", sep=""),width=5,height=5,paper='special') 

ggsurvplot(fit, conf.int = F, legend.labs=c("Comm. Cent. = Weak/Moderate", "Comm. Cent. = High"),
           ggtheme = theme( axis.line = element_line(colour = "black"),
                            panel.grid.major = element_blank(),
                            panel.grid.minor = element_blank(),
                            panel.background = element_blank()), 
           ylim=c(0.7,1),  data=new, risk.table = FALSE)


dev.off()


# end of do file 

#sink()  # use for logging output